Contents

1 Preparations

library(Seurat)
library(tidyverse)
library(data.table)
library(reshape2)
library(ggpointdensity)
library(magrittr)

source(params$utils_path)
source(params$io_path)

date <- params$date
date_new <- params$date#format(Sys.Date(), '%Y-%m-%d')
section_name <- params$section_name
outdir <- file.path(OutFolder, section_name)
plotsdir <- file.path(HTMLFolder, "pipeline/", section_name, paste0(date_new, "/"))
if(!dir.exists(outdir)) dir.create(outdir)
if(!dir.exists(plotsdir)) dir.create(plotsdir)

# to render pngs in html
library(Cairo)
knitr::opts_chunk$set(fig.path = plotsdir, dev=c("CairoPNG", "pdf"))

2 Get target meta data incl. number of downstream genes

target_df <- read.csv(file.path(OutFolder, "7b_target_summary", paste0(date, "_target_meta_data.csv")))
target_df <- dplyr::filter(target_df, gene != "NonTarget")

2.1 Number of Effects in total

 target_df %>%
  dplyr::filter(n_cells >= min_cells_per_gene) %>%
  ggplot(aes(x=n_downstream, fill = group)) + geom_histogram() + facet_wrap(~group) +
  theme_classic() + scale_fill_manual(values = cols4Pools) +
  xlab("Number of genes affected by knockdown") + scale_x_log10() +
  guides(fill = "none")
## Warning: Transformation introduced infinite values in continuous x-axis
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2011 rows containing non-finite values (stat_bin).

target_df_mincells <- filter(target_df, !is.na(n_downstream_excl_target))
nrow(target_df_mincells)
## [1] 6673
gg <- target_df_mincells %>%
  ggplot(aes(x=pre_day10_lfc_mean, y = n_downstream_excl_target, col = group)) +
  geom_pointdensity() +
  scale_y_log10() + theme_bw() + guides(col = "none") +
  ylab("# DEGs") +
    xlab("Growth phenotype (day 10 LFC)") +
  scale_color_manual(values = cols4Pools) +
  ggpubr::stat_cor(col = "black", cor.coef.name = "rho", method = "spearman") +
  geom_smooth(method = "lm", col = "darkgrey", lty = "dashed", se = F)

ggExtra::ggMarginal(gg, groupColour = TRUE, type = "histogram", groupFill = TRUE, position = 'dodge')
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 3300 rows containing non-finite values (stat_pointdensity).
## Warning: Removed 3300 rows containing non-finite values (stat_cor).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3300 rows containing non-finite values (stat_smooth).
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 3300 rows containing non-finite values (stat_pointdensity).
## Warning: Removed 3300 rows containing non-finite values (stat_cor).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 3300 rows containing non-finite values (stat_smooth).

2.2 Top Hits

df4plot <- target_df %>% slice_max(order_by = n_downstream_excl_target, n = 25)
ggplot(df4plot, aes(y = reorder(gene, n_downstream_excl_target), x = n_downstream_excl_target, fill = group)) + 
  xlab('Number of Downstream DE Genes in Trans') + ylab('') + 
  scale_fill_manual(values = cols4Pools) + 
  geom_bar(stat = 'identity') + theme_bw()

## do strong and moderate
df4plot <- split.data.frame(target_df, f = target_df$group)
df4plot <- mapply(df4plot, FUN = function(df){
  rtn <- df %>% slice_max(order_by = n_downstream_excl_target, n = 25) %>%
    mutate(group = ifelse(group == "Moderate", "Non-Fitness Genes", "Fitness Genes"))
  p <- ggplot(rtn, aes(y = reorder(gene, n_downstream_excl_target), x = n_downstream_excl_target, fill = group)) + 
  facet_wrap(~group) + 
  xlab('Number of Downstream DE Genes in Trans') + ylab('') + 
  scale_fill_manual(values = cols4Pools_Renamed) + 
  geom_bar(stat = 'identity') + theme_bw() + theme(legend.position = 'none')
return(p)
}, SIMPLIFY = F)
ggpubr::ggarrange(plotlist = df4plot, ncol = 2)

#target_df %>% slice_max(order_by = n_downstream_excl_target, n = 25)

2.3 Add variances

# TODO this should be in step 7b and part of the target_df
# sc_variance is contained, but not variance at bulk level
var_donor <- read.table(file.path(OutFolder, "Preprocess_External_Datasets", "coexpression",  paste0("variance_in_expression_across_donors_magpie-", date, "-version.tsv")))

var_sc_tab <- read.table(file.path(OutFolder, "Preprocess_External_Datasets", "coexpression",  paste0("sc_regressed_variance_per_gene_magpie-", date, "-version.tsv")), sep = "\t", skip = 1)
var_sc <- var_sc_tab$V2
names(var_sc) <- var_sc_tab$V1
stopifnot(all(names(var_sc)  == rownames(var_donor)))
df_vars <- cbind(var_donor, var_sc) %>% rownames_to_column("target") %>% mutate(target_short =  gsub(".*?:", "",
                                                                                          gsub(":Gene-Expression", "",target)))

stopifnot(!any(duplicated(df_vars$target_short)))
target_df <- left_join(target_df,df_vars, by =c("gene" = "target_short"))

all(target_df$sc_variance == target_df$var_sc, na.rm = TRUE)
## [1] TRUE

2.4 Add conservation scores

# TODO this should be in step 7b and part of the target_df
cons_scores <- read.csv(file.path(ResourcesFolder,
                                  "conservation_scores",
                                  "conservation_scores_phastCons100way.UCSC.hg38.csv"))

 target_df <- left_join(target_df, cons_scores, by = c("gene" = "GeneSymbol"))
 table(is.na(target_df$Conservation))
## 
## FALSE  TRUE 
##  6671   555

2.5 Correct mean expression

# TODO: not needed any more with newer runs of the pipeline
if(params$date == "2022-05-31") {
  tmp <- fread("/lustre/scratch123/hgi/projects/crispri_scrnaseq/outs/Magpie/6b_calc_lfcs_transcriptome_wide_by_gene/2022-05-31_combined_with_adj_pval_REORDERED.tsv.gz")
  df_mean_expr <- tmp %>% group_by(downstream_gene) %>% summarise(control_norm_expr_corrected = unique(control_norm_expr))
  target_df <- left_join(target_df, df_mean_expr, by = c("target" = "downstream_gene"))
} else {
  target_df$control_norm_expr_corrected <- target_df$control_norm_expr
}

3 Plot correlation matrix

X <- target_df %>% dplyr::filter(!is.na(n_downstream_excl_target)) %>%
  dplyr::select(n_downstream_excl_target,
                n_cells,
                mean_effect_depmap,
                var_effect_depmap,
                growth_lfc_d3 = pre_day3_lfc_mean,
                growth_lfc_d4 = pre_day4_lfc_mean,
                growth_lfc_d5 = pre_day5_lfc_mean,
                growth_lfc_d9 = pre_day9_lfc_mean,
                growth_lfc_d10 = pre_day10_lfc_mean,
                n_coess_modules,
                mean_expression_control = control_norm_expr_corrected,
                n_PPIs = num_omnipath_interactions,
                n_protein_complexes = num_omnipath_protein_complexes,
                n_hallmark_gene_sets = num_msigdb_gene_sets,
                n_trans_effects_eQTLs = num_eQTLs_cis_gene,
                heritability = mean_var_expl_by_donor_kilpinen,
                target_lfc,
                var_across_donors,
                var_sc) %>%
  as.matrix()
dim(X)
## [1] 6673   19
n_targets_av <- apply(X,2, function(c) sum(!is.na(c))) # targets per annotation
n_targets_av
## n_downstream_excl_target                  n_cells       mean_effect_depmap 
##                     6673                     6673                     6533 
##        var_effect_depmap            growth_lfc_d3            growth_lfc_d4 
##                     6533                     6673                     6673 
##            growth_lfc_d5            growth_lfc_d9           growth_lfc_d10 
##                     6673                     6673                     6673 
##          n_coess_modules  mean_expression_control                   n_PPIs 
##                     6026                     4874                     6673 
##      n_protein_complexes     n_hallmark_gene_sets    n_trans_effects_eQTLs 
##                     6673                     6673                     6673 
##             heritability               target_lfc        var_across_donors 
##                     4418                     4874                     6033 
##                   var_sc 
##                     6033
summary(n_targets_av)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4418    6033    6673    6249    6673    6673
sum(apply(is.na(X),1, function(r) sum(r))==0) # targets with all annotations
## [1] 2968
cor_mat <- cor(X, method = "spearman", use = "pairwise.complete")
my_heatmap(t(cor_mat), show_colnames = F, min_c= -1, max_c = 1, midpoint = 0)

 cor(X[,c("n_downstream_excl_target", "growth_lfc_d10")], method = "spearman")
##                          n_downstream_excl_target growth_lfc_d10
## n_downstream_excl_target                1.0000000     -0.2867884
## growth_lfc_d10                         -0.2867884      1.0000000
cor_mat["n_downstream_excl_target", "growth_lfc_d10"] # from 2B
## [1] -0.2867884
order_vars <- order(cor_mat["n_downstream_excl_target",colnames(cor_mat) != "n_downstream_excl_target"])
names_var <- names(cor_mat["n_downstream_excl_target",colnames(cor_mat) != "n_downstream_excl_target"])[order_vars]
pvals_cor_test <- sapply(names_var,
                         function(nm) cor.test(X[,nm], X[,"n_downstream_excl_target"], method = "spearman")$p.value)
## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties

## Warning in cor.test.default(X[, nm], X[, "n_downstream_excl_target"], method =
## "spearman"): Cannot compute exact p-value with ties
padj <- p.adjust(pvals_cor_test, method = "BH")
names(padj)[padj < 0.05]
##  [1] "growth_lfc_d10"          "growth_lfc_d9"          
##  [3] "mean_effect_depmap"      "growth_lfc_d5"          
##  [5] "growth_lfc_d4"           "var_across_donors"      
##  [7] "target_lfc"              "var_sc"                 
##  [9] "n_hallmark_gene_sets"    "growth_lfc_d3"          
## [11] "n_coess_modules"         "mean_expression_control"
## [13] "n_protein_complexes"     "var_effect_depmap"      
## [15] "n_cells"
stopifnot(names(padj) == names_var)

nicer_names <- data.frame(
  old_name = c("n_downstream_excl_target", "var_effect_depmap", "n_protein_complexes",
               "conservation_score", "mean_expression_control",
               "n_coess_modules", "n_trans_effects_eQTLs",
               "n_PPIs", "heritability", "growth_lfc_d3",
               "n_hallmark_gene_sets",
               "var_across_donors",
               "n_cells",
               "growth_lfc_d4",
               "var_sc",
               "target_lfc",
               "growth_lfc_d5",
               "growth_lfc_d9",
               "growth_lfc_d10",
               "mean_effect_depmap"),
  new_name = c("n_downstream_excl_target", "Variance of Effect Across DepMap Lines", "Number of Protein Complexes",
            "Conservation Score", "Mean Expression (Control Cells)",
            "Number of Coessentiality Modules", "Number of Trans Effects (eQTLs)", "Number of Protein Interactions", "Heritability", 
            "Growth LFC (Day 3)", "Number of Hallmark Gene Sets", 
            "Variance of Expression (Bulk across Donors)", "Number of Cells", "Growth LFC (Day 4)", "Variance of Expression (Single-Cell)",
            "LFC of Target Gene","Growth LFC (Day 5)", "Growth LFC (Day 9)",
            "Growth LFC (Day 10)", "Mean Effect in DepMap")
)
               
colnames(cor_mat) <- nicer_names$new_name[match(row.names(cor_mat), nicer_names$old_name)]
my_heatmap(cor_mat["n_downstream_excl_target", nicer_names$new_name[match(names_var, nicer_names$old_name)], drop = FALSE],
           cluster_rows = FALSE, cluster_cols = FALSE, show_rownames = FALSE,
           display_numbers = t(as.matrix(ifelse(padj<0.001,"***", ifelse(padj<0.01,"**", ifelse(padj<0.05,"*",""))))))

## exclude some stuff
#df4plot <- cor_mat["n_downstream_excl_target", nicer_names$new_name[match(names_var, nicer_names$old_name)]]
#my_heatmap(, drop = FALSE],
#           cluster_rows = FALSE, cluster_cols = FALSE, show_rownames = FALSE,
#           display_numbers = t(as.matrix(ifelse(padj<0.001,"***", ifelse(padj<0.01,"**", #ifelse(padj<0.05,"*",""))))))
for(nm in names_var){
  print(qplot(X[,nm], X[, "n_downstream_excl_target"]) +
          geom_pointdensity() +
          ggpubr::stat_cor(method = "spearman", cor.coef.name = "rho") +
          xlab(nm) + ylab("# DEGs") + theme_bw() +
          scale_y_log10())
}

knitr::opts_chunk$set(eval = FALSE)

4 Plots of indivudal covariates

4.1 Growth effects

x <- target_df$growth_lfc_d9
y <- target_df$n_downstream_excl_target
q <- quantile(x, seq(0,1,0.1))
q[1] <- q[1] - 0.1
a <- target_df$group
xcut <- cut(x, breaks = q)
qplot(xcut, y)  + scale_y_log10() +
  xlab("growth_lfc_d9") + ylab("Number of downstream effects") +
  theme_bw() + geom_violin(aes(fill = as.numeric(xcut))) +
  geom_boxplot(width = 0.1) + theme(axis.text.x = element_text(angle = 90)) +
  guides(fill = "none")

qplot(x, y)  + scale_y_log10() +
  xlab("growth_lfc_d9") + ylab("Number of downstream effects") +
  theme_bw() + geom_point() + stat_smooth()

5 Growth effect

target_df %>%
  select(n_downstream_excl_target, group, starts_with("growth_lfc")) %>%
  gather(starts_with("growth_lfc"), key = "day", value = LFC_growth) %>%
  mutate(day = factor(day, levels = paste0("growth_lfc_d",1:11))) %>%
  ggplot(aes(x= LFC_growth, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_grid(day~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("LFC in growth screen") + ylab("Number of genes affected") +
  guides(col = "none")

ggplot(target_df, aes(x= group, y=n_downstream_excl_target, fill = group)) + geom_boxplot() +
  theme_classic() + scale_fill_manual(values = cols4Pools) +
  xlab("LFC of target gene") + ylab("Number of genes affected") +
  guides(fill = "none") + scale_y_log10() + ggpubr::stat_compare_means()

5.1 Number of cells

ggplot(target_df, aes(x= n_cells, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Number of cells / target") + ylab("Number of genes affected")
  guides(fill = "none")

5.2 Target expression

ggplot(target_df, aes(x= mean_expression_control, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Expression of target") + ylab("Number of genes affected")
  guides(fill = "none")

5.3 Target LFC

ggplot(target_df, aes(x= target_lfc, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("LFC of target gene") + ylab("Number of genes affected")
  guides(fill = "none")

5.4 Variance

ggplot(target_df, aes(x= var_across_donors, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Target variance (pseudo-bulk, Cuomo et al)") + ylab("Number of genes affected")
  guides(fill = "none")
  
  ggplot(target_df, aes(x= var_sc, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Target variance (single-cell, Cuomo et al)") + ylab("Number of genes affected")
  guides(fill = "none")

5.5 Heritability

ggplot(target_df, aes(x= heritability, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Heritability (Kilpinen et al)") + ylab("Number of genes affected") +
  guides(fill = "none")

5.6 Conservation

ggplot(target_df, aes(x= conservation_phastCons100way, y=n_downstream_excl_target, col = group)) +
  geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("conservation score (phastCons100way)") + ylab("Number of genes affected") +
  guides(fill = "none")

5.7 Interactions

ggplot(target_df, aes(x= num_omnipath_interactions, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("# Omnipath interactions") + ylab("Number of genes affected") +
  guides(fill = "none")

ggplot(target_df, aes(x= num_msigdb_gene_sets, y=n_downstream_excl_target, col = group)) + geom_point() +
  facet_wrap(~group) +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("# MSigDB gene set annotated in") + ylab("Number of genes affected") +
  guides(fill = "none")

ggplot(target_df, aes(x= num_omnipath_protein_complexes>0, y=n_downstream_excl_target, col = group)) + #geom_boxplot() +
  facet_wrap(~group) + stat_summary() +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Is in protein complex?") + ylab("Number of genes affected") +
  guides(fill = "none")

ggplot(target_df, aes(x= num_eQTLs_genes>0, y=n_downstream_excl_target, col = group)) + #geom_boxplot() +
  facet_wrap(~group) + stat_summary() +
  theme_classic() + scale_color_manual(values = cols4Pools) +
  xlab("Is in eQTL gene ?") + ylab("Number of genes affected") +
  guides(fill = "none")

5.8 DEG vs. Growth Phenotype

gg <- target_df %>%
  ggplot(aes(x=growth_lfc_d10, y = n_downstream, col = group)) +
  geom_pointdensity() +
  scale_y_log10() + theme_bw() + guides(col = "none") +
  ylab("Number of DEGs (padj < 0.1, |LFC| > 0.1)") +
    xlab("Growth phenotype (day 10 LFC)") +
  scale_color_manual(values = cols4Pools)

ggExtra::ggMarginal(gg, groupColour = TRUE, type = "histogram", groupFill = TRUE, position = 'dodge')

6 Multivariate Models

6.1 Linear model

target_df_scaled <- apply(select(target_df, where(is_numeric)),2,scale) %>%
  as.data.frame()
target_df_scaled$n_downstream_excl_target <- target_df$n_downstream_excl_target
# fit a model to predict number of downstream effects
fit <- glm(n_downstream_excl_target ~  n_cells + mean_effect_depmap + var_effect_depmap +
             growth_lfc_d3 + growth_lfc_d4 +  growth_lfc_d5 +
             growth_lfc_d9 +  growth_lfc_d10 +
             n_coess_modules + mean_expression_control +
             num_omnipath_interactions + num_omnipath_protein_complexes +
             num_msigdb_gene_sets + num_eQTLs_genes +
             heritability + conservation_phastCons100way +
             target_lfc +var_across_donors + var_sc,
           data = target_df_scaled, family = "poisson")
summary(fit)
in_order_vars <-  names(sort(fit$coefficients))
pvals <- melt(p.adjust(summary(fit)$coefficients[,4], method = "BH"), value.name = "padj") %>%
  rownames_to_column("variable") 
melt(coef(fit), value.name = "glm_coef") %>%
  rownames_to_column("variable") %>%
  left_join(pvals, by = "variable") %>% 
  filter(variable != "(Intercept)") %>%
  mutate(variable = factor(variable, levels = in_order_vars)) %>%
  ggplot(aes(x= variable, y = glm_coef, fill = glm_coef <0)) +
  geom_bar(stat = "identity") + coord_flip() +
  geom_text(aes(x= variable, y = glm_coef + 0.1 * sign(glm_coef),
                label = ifelse(padj<0.001,"***", ifelse(padj<0.01,"**", ifelse(padj<0.05,"*",""))))) +
  theme_bw() + scale_fill_manual(values = c("TRUE" = "navy", "FALSE" = "darkred")) +
  guides(fill = "none")

6.2 Elastic net

complete <- apply(target_df,1, function(x) !any(is.na(x)))
y <- target_df$n_downstream_excl_target
y <- y[complete]
X <- model.matrix( ~ 0 + n_cells + mean_effect_depmap + var_effect_depmap +
             growth_lfc_d3 + growth_lfc_d4 +  growth_lfc_d5 +
             growth_lfc_d9 +  growth_lfc_d10 +
             n_coess_modules + mean_expression_control +
             num_omnipath_interactions + num_omnipath_protein_complexes +
             num_msigdb_gene_sets + num_eQTLs_genes +
             heritability + conservation_phastCons100way +
             target_lfc + var_across_donors + var_sc, data = target_df[complete,])
X <- X[,colnames(X) != "groupModerate"]
X <- scale(X)
fit <- glmnet::cv.glmnet(y =y ,X, family = "poisson", alpha = 0.1)
in_order_vars <- names(sort(coef(fit)[,1]))
melt(coef(fit), value.name = "glmnet_coef") %>%
  rownames_to_column("variable") %>% filter(variable != "(Intercept)") %>%
  mutate(variable = factor(variable, levels = in_order_vars)) %>%
  ggplot(aes(x= variable, y = glmnet_coef, fill = glmnet_coef <0)) +
  geom_bar(stat = "identity") + coord_flip() +
  theme_bw() + scale_fill_manual(values = c("TRUE" = "navy", "FALSE" = "darkred")) +
  guides(fill = "none")

7 SessionInfo

sessionInfo()